* GSS data setup for: 
* Class, Policy Attitudes and U.S. Presidential Voting in the Post-Industrial Era: The Importance of Issue Salience
* Franko & Witko
* Political Research Quarterly
* Date: 05/25/22

*******************************************************************************
***** Data setup.
*******************************************************************************


use "GSS_Class_wEGP.dta", clear


* Demographics.
replace age = . if age == 98 | age == 99
*gen age2 = age^2
replace educ = . if educ == 98 | educ == 99
recode educ (0/8 = 1) (9/11 = 2) (12 = 3) (13/15 = 4) (16/20 = 5), gen(edu5)
recode sex (2 = 1 "Yes") (1 = 0 "No"), gen(female)
recode race (1 = 1 "Yes") (2 3 = 0 "No"), gen(white)
replace realinc = . if realinc == 0
gen realinc1k = realinc / 1000
xtile inc5 = realinc, n(5)
xtile inc4 = realinc, n(4)
recode partyid (0 = 1) (1 = 2) (2 = 3) (3 = 4) (4 = 5) (5 = 6) (6 = 7) (7 8 9 = .), gen(pid7) 

  label define pid7_lab 1 "Strong dem" 2 "Not str dem" 3 "Ind, near dem" 4 "Independent" 5 "Ind, near rep" 6 "Not str rep" 7 "Strong rep"
  label values pid7 pid7_lab
recode polviews (0 8 9 = .) 

rename class_ subclass
replace subclass = . if inlist(subclass, 0, 5, 8, 9)

* Redistribution.
recode eqwlth (0 8 9 99 = .) (1 = 7 "Govt reduce diff") (2 = 6) (3 = 5) (4 = 4) (5 = 3) (6 = 2) (7 = 1 "No govt action"), gen(govineq)
* Scaled 0 to 1 version.
gen govineq01 = (govineq - 1) / 6

recode govineq (1 2 3 = 1 "Less govt action") (4 5 = 2 "Some govt action") (6 7 = 3 "More govt action"), gen(govineq3)

* Combine some EGP categories with small sample sizes.
recode egp10_11 (7 = 5) (11 = 10) (12 = .), gen(egp11_alt)
  label define egp11alt 1 "I" 2 "II" 3 "IIIa" 4 "IIIb" 5 "IVabc" ///
  8 "V" 9 "VI" 10 "VIIab"
  label values egp11_alt egp11alt
* Variations of EGP with fewer categories.
recode egp10_10 (1 2 = 1) (3 = 2) (4 = 3) (7 = .) (8 9 10 11 = 4) (12 = .), gen(egp4)
recode egp10_10 (1 2 = 1) (3 = 2) (4 = 3) (7 = .) (8 = 4) (9 10 11 = 5) (12 = .), gen(egp5)

tab egp5, gen(egp5_)


* Presidential vote choice.
* Recode each variable to two-party vote.
* 1 = D, 2 = R.
foreach v of varlist pres* {
	recode `v' (1=1) (2=0) (else=.), gen(dv_`v')
}

* Create single two-party vote variable using year closest to election.
tabstat dv_pres* , by(year) format(%9.3g) nototal

gen demvote = .
replace demvote = dv_pres72 if year == 1973
replace demvote = dv_pres76 if year == 1977
replace demvote = dv_pres80 if year == 1982
replace demvote = dv_pres84 if year == 1985
replace demvote = dv_pres88 if year == 1989
replace demvote = dv_pres92 if year == 1993
replace demvote = dv_pres96 if year == 1998
replace demvote = dv_pres00 if year == 2002
replace demvote = dv_pres04 if year == 2006
replace demvote = dv_pres08 if year == 2010
replace demvote = dv_pres12 if year == 2014
replace demvote = dv_pres16 if year == 2018

gen presyear = .
replace presyear = 1972 if dv_pres72 != .
replace presyear = 1976 if dv_pres76 != .
replace presyear = 1980 if dv_pres80 != .
replace presyear = 1984 if dv_pres84 != .
replace presyear = 1988 if dv_pres88 != .
replace presyear = 1992 if dv_pres92 != .
replace presyear = 1996 if dv_pres96 != .
replace presyear = 2000 if dv_pres00 != .
replace presyear = 2004 if dv_pres04 != .
replace presyear = 2008 if dv_pres08 != .
replace presyear = 2012 if dv_pres12 != .
replace presyear = 2016 if dv_pres16 != .


* Race policy attitudes.
tabstat affrmact helpblk natrace, by(year) stat(n)

recode affrmact (1 = 4) (2 = 3) (3 = 2) (4 = 1) (0 8 9 = .), gen(affrm)
gen affrm01 = (affrm - 1) / 3

recode helpblk (1 = 5) (2 = 4) (3 = 3) (4 = 2) (5 = 1) (0 8 9 = .), gen(helpb)
gen helpb01 = (helpb - 1) / 4

recode natrace (1 = 3) (2 = 2) (3 = 1) (0 8 9 = .), gen(imprb)
gen imprb01 = (imprb - 1) / 2


gen racepol01_b = (affrm01 + helpb01 + imprb01) / 3

gen racepol01 = (helpb01 + imprb01) / 2


* Culture policy attitudes.
tabstat abany abdefect abnomore abhlth abpoor abrape absingle, by(year) stat(n)
tabstat spkhomo colhomo libhomo homosex, by(year) stat(n)

recode abany (1 = 1) (2 = 0) (0 8 9 = .), gen(aborany)
recode abdefect (1 = 1) (2 = 0) (0 8 9 = .), gen(abordef)
recode abnomore (1 = 1) (2 = 0) (0 8 9 = .), gen(aborwant)
recode abhlth (1 = 1) (2 = 0) (0 8 9 = .), gen(aborhlth)
recode abpoor (1 = 1) (2 = 0) (0 8 9 = .), gen(aborpoor)
recode abrape (1 = 1) (2 = 0) (0 8 9 = .), gen(aborrape)
recode absingle (1 = 1) (2 = 0) (0 8 9 = .), gen(aborsing)

gen abor01 = (aborany + abordef + aborwant + aborhlth + aborpoor + aborrape + aborsing) / 7

recode spkhomo (1 = 1) (2 = 0) (0 8 9 = .), gen(gayspk)
recode colhomo (4 = 1) (5 = 0) (0 8 9 = .), gen(gaytch)
recode libhomo (1 = 0) (2 = 1) (0 8 9 = .), gen(gaybook)
recode homosex (0 5 8 9 = .), gen(gayok)

gen gayok01 = (gayok - 1) / 3

gen gayr01 = (gayok01 + gayspk + gaytch + gaybook) / 4


gen culpol01 = (abor01 + gayr01) / 2


* Region.
note region: New England = Maine, Vermont, New Hampshire, Massachusetts, Connecticut, Rhode Island
note region: Middle Atlantic = New York, New Jersey, Pennsylvania
note region: East North Central = Wisconsin, Illinois, Indiana, Michigan, Ohio
note region: West North Central = Minnesota, Iowa, Missouri, North Dakota, South Dakota, Nebraska, Kansas
note region: South Atlantic = Delaware, Maryland, West Virginia, Virginia, North Carolina, South Carolina, Georgia, Florida, District of Columbia
note region: East South Central = Kentucky, Tennessee, Alabama, Mississippi
note region: West South Central = Arkansas, Oklahoma, Louisiana, Texas
note region: Mountain = Montana, Idaho, Wyoming, Nevada, Utah, Colorado, Arizona, New Mexico
note region: Pacific = Washington, Oregon, California, Alaska, Hawaii

recode region (1 2 = 1 "Northeast") (3 4 = 2 "Midwest") (5 6 7 = 3 "South") (8 9 = 4 "West"), gen(region2)

recode region2 (3 = 1) (1 2 4 = 0), gen(south)


* Add numeric values to labels.
numlabel , add


* Find years where variables are non-missing.
tabstat demvote govineq01 culpol01 racepol01 subclass egp5_1-egp5_5 inc5 edu5 pid7 polviews white age female south, by(year)

* Summary statistics.
* Save results to create tables later.
estpost sum demvote govineq01 culpol01 racepol01 subclass egp5_1-egp5_5 inc5 edu5 pid7 polviews white age female south if year >= 1977 & age >= 18
estimates store sum_gss

* Missing value patterns for class vars.
misstable pattern subclass inc5 edu5 egp5_1-egp5_5 if year >= 1977 & age >= 18, asis

* Class vars correlations.
spearman subclass inc5 edu5 egp5_1-egp5_5 if year >= 1977 & age >= 18, pw

* Save data for descriptive plotting.
save GSS_Class_wEGP_clean.dta, replace


